from bs4 import BeautifulSoup
import requests
import time

def save_url(url):
    """
    """
    ret = requests.get(url).content.decode('gbk','ignore')
    soup = BeautifulSoup(ret)
    s = soup.find_all('td', class_='lineheight')
    find = soup.find_all('a', text='下一页')
    bbb = soup.find_all('a', text='proshow.asp?id=')
    # print(aaa)

    print(bbb)
    exit()
    with open('goods.txt','a') as w:
        for item in s:
            aes = item.find_all('a')
            w.write(aes[0]['href'])
            w.write("\n")
            next_page = find[0]['href']
        if next_page:
            return 'http://www.chinatimber.org' +next_page
        return None

def save_url_all():
    """
    """
    next_page = save_url("http://www.chinatimber.org/mall/product.asp?page=1&ty=x")
    i = 0
    while next_page:
        next_page = save_url(next_page)
        time.sleep(1)
        print(next_page)
        i += 1
        if i % 10 == 0:
            time.sleep(600)

def save_detail():
    with open('goods.txt','r') as r:
        """
        """
        i = 0
        with open('goods.txt','a') as w:
            url = r.readline()
            while url:
                ret = requests.get('http://www.chinatimber.org/info/' + url.strip()).content.decode('gbk','ignore')
                soup = BeautifulSoup(ret)
                text = soup.find('div', class_='Contentbox')
                td = text.find('td')
                name = soup.find('span', class_='titlestyle')
                res = name.get_text() + '&&&' + td.get_text()
                w.write(res)
                w.write("\n")
                url = r.readline()
                i += 1
                print(i)
                time.sleep(1)


if __name__ == '__main__':
    save_url_all()
    # save_detail()